# Summary: Creates SI E

##############################################
#----------------- SI E ---------------------#
##############################################

options(scipen=999)
rm(list = ls())

# Load libraries
library(tidyverse)
library(lmtest)
library(sandwich)
library(margins)
library(DescTools)
library(insight)

# Set working directory
# setwd('~/Dataverse/')

# Load datasets
load(file = "./Data/df.RData")
load(file = "./Data/df_long.RData")

#############
# TABLE E.8 #
#############

test_diff_coeff = df

reference = c("Black Women","Black Men","Hispanic Women","Hispanic Men","White Women","White Men")

values_ls = list()

for(i in 1:length(reference)){
  
  test_diff_coeff$racegender = relevel(as.factor(test_diff_coeff$racegender), ref=reference[i])
  
  model = glm(considered_running ~ racegender +
                political_interest +
                n_offices_qualified +
                partyID +
                encouraged +
                AGE7 +
                education +
                income +
                married, 
              data = test_diff_coeff,
              family = binomial)
  
  model_robust = coeftest(model, vcov. = vcovHC(model, type="HC1"))
  d = as.data.frame(model_robust[2:6,])
  d$reference = reference[i]
  d$group = rownames(d)
  rownames(d) = NULL
  values_ls[[i]] = d
  
}

values = do.call(rbind.data.frame, values_ls)
values$Estimate = round(values$Estimate, 3)
values$`Std. Error` = round(values$`Std. Error`, 3)
values$sig = NA
values$sig[values$`Pr(>|z|)`<0.05] = "*"
values$sig[values$`Pr(>|z|)`<0.01] = "**"
values$sig[values$`Pr(>|z|)`<0.001] = "***"

sink("./SI_E_Results/tableE8.txt")
print("TABLE E.8: Logistic Model: Difference in Coefficients Across Groups")
values
sink()


#############
# TABLE E.9 #
#############

offices = c("School Board","City Council","Mayor",
            "State Legislature","Governor",
            "US House","US Senate","President")

test_diff_coeff_m2 = df_long %>%
  mutate(racegender_office = paste0(offices,"_",racegender))

reference = expand_grid(offices=offices,racegender=unique(test_diff_coeff_m2$racegender))

reference = sort(paste0(reference$offices,"_",reference$racegender))

values_ls = list()

for(i in 1:length(reference)){
  
  test_diff_coeff_m2$racegender_office = relevel(as.factor(test_diff_coeff_m2$racegender_office), ref=reference[i])
  
  model = glm(considered_running ~ racegender_office +
                political_interest +
                qualified_office +
                partyID +
                encouraged +
                AGE7 +
                education +
                income +
                married, 
              data = test_diff_coeff_m2,
              family = binomial)
  
  model_robust = coeftest(model, vcov. = vcovCL(model, vcov = vcovCL, cluster = ~CaseId))
  d = as.data.frame(model_robust[2:48,])
  d$reference = reference[i]
  d$group = rownames(d)
  rownames(d) = NULL
  values_ls[[i]] = d
  
}

values = do.call(rbind.data.frame, values_ls)

values = values %>%
  mutate(ref_office = sub("_.*", "", reference),
         group_office = sub(".*office([^_]+)_.*", "\\1", group)) %>%
  dplyr::filter(ref_office==group_office) %>%
  mutate(reference = sub(".*_", "", reference),
         group = sub(".*_(.*)", "\\1", group))

values$Estimate = round(values$Estimate, 3)
values$`Std. Error` = round(values$`Std. Error`, 3)
values$sig = NA
values$sig[values$`Pr(>|z|)`<0.05] = "*"
values$sig[values$`Pr(>|z|)`<0.01] = "**"
values$sig[values$`Pr(>|z|)`<0.001] = "***"

sink("./SI_E_Results/tableE9.txt")
print("TABLE E.9: Logistic Model: Difference in Coefficients Across Groups and Offices")
print("School Board")
values[values$ref_office=="School Board",]
print("City Council")
values[values$ref_office=="City Council",]
print("Mayor")
values[values$ref_office=="Mayor",]
print("State Legislature")
values[values$ref_office=="State Legislature",]
print("Governor")
values[values$ref_office=="Governor",]
print("US House")
values[values$ref_office=="US House",]
print("US Senate")
values[values$ref_office=="US Senate",]
print("President")
values[values$ref_office=="President",]
sink()


##############
# TABLE E.10 #
##############

test_diff_coeff_m3 = df %>%
  mutate(encouraged_racegender = paste0(encouraged,"_",racegender))

reference = expand_grid(encouraged=unique(test_diff_coeff_m3$encouraged),racegender=unique(test_diff_coeff_m3$racegender))

reference = sort(paste0(reference$encouraged,"_",reference$racegender))

values_ls = list()

for(i in 1:length(reference)){
  
  test_diff_coeff_m3$encouraged_racegender = relevel(as.factor(test_diff_coeff_m3$encouraged_racegender), ref=reference[i])
  
  model = glm(considered_running ~ encouraged_racegender +
                political_interest +
                n_offices_qualified +
                partyID +
                AGE7 +
                education +
                income +
                married, 
              data = test_diff_coeff_m3,
              family = binomial)
  
  model_robust = coeftest(model, vcov. = vcovHC(model, type="HC1"))
  d = as.data.frame(model_robust[2:24,])
  d$reference = reference[i]
  d$group = rownames(d)
  rownames(d) = NULL
  values_ls[[i]] = d
  
}

values = do.call(rbind.data.frame, values_ls)

values = values %>%
  mutate(ref_encouraged = sub("_.*", "", reference),
         group_encouraged = sub(".*racegender([^_]+)_.*", "\\1", group)) %>%
  dplyr::filter(ref_encouraged==group_encouraged) %>%
  mutate(reference = sub(".*_", "", reference),
         group = sub(".*_(.*)", "\\1", group))

values$Estimate = round(values$Estimate, 3)
values$`Std. Error` = round(values$`Std. Error`, 3)
values$sig = NA
values$sig[values$`Pr(>|z|)`<0.05] = "*"
values$sig[values$`Pr(>|z|)`<0.01] = "**"
values$sig[values$`Pr(>|z|)`<0.001] = "***"

sink("./SI_E_Results/tableE10.txt")
print("TABLE E.10: Logistic Model: Difference in Coefficients Across Groups and Source of Encouragement")
print("Not Encouraged")
values[values$ref_encouraged=="None",]
print("Encouraged by Non-Political Source(s)")
values[values$ref_encouraged=="Non-Political",]
print("Encouraged by Political Source(s)")
values[values$ref_encouraged=="Political",]
print("Encouraged by Political and Non-Political Source(s)")
values[values$ref_encouraged=="Both",]
sink()






